{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "89e62937",
   "metadata": {},
   "source": [
    "# Subjects and objects"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "574d6a36",
   "metadata": {},
   "outputs": [],
   "source": [
    "%run -i \"../util/file_utils.ipynb\"\n",
    "%run -i \"../util/lang_utils.ipynb\""
   ]
  },
  {
   "cell_type": "markdown",
   "id": "8b646209",
   "metadata": {},
   "source": [
    "# Get subject phrase"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "3857bdee",
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_subject_phrase(doc):\n",
    "    for token in doc:\n",
    "        if (\"subj\" in token.dep_):\n",
    "            subtree = list(token.subtree)\n",
    "            start = subtree[0].i\n",
    "            end = subtree[-1].i + 1\n",
    "            return doc[start:end]"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "d2eb21c4",
   "metadata": {},
   "source": [
    "# Get object phrase"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "9ce4e810",
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_object_phrase(doc):\n",
    "    for token in doc:\n",
    "        if (\"dobj\" in token.dep_):\n",
    "            subtree = list(token.subtree)\n",
    "            start = subtree[0].i\n",
    "            end = subtree[-1].i + 1\n",
    "            return doc[start:end]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "0a0a01f3",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "The big black cat stared at the small dog.\n",
      "\tSubject: The big black cat\n",
      "\tDirect object: None\n",
      "Jane watched her brother in the evenings.\n",
      "\tSubject: Jane\n",
      "\tDirect object: her brother\n",
      "Laura gave Sam a very interesting book.\n",
      "\tSubject: Laura\n",
      "\tDirect object: a very interesting book\n"
     ]
    }
   ],
   "source": [
    "sentences = [\n",
    "    \"The big black cat stared at the small dog.\", \n",
    "    \"Jane watched her brother in the evenings.\", \n",
    "    \"Laura gave Sam a very interesting book.\"\n",
    "]\n",
    "for sentence in sentences:\n",
    "    doc = small_model(sentence)\n",
    "    subject_phrase = get_subject_phrase(doc)\n",
    "    object_phrase = get_object_phrase(doc)\n",
    "    print(sentence)\n",
    "    print(\"\\tSubject:\", subject_phrase)\n",
    "    print(\"\\tDirect object:\", object_phrase)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c63ab78a",
   "metadata": {},
   "source": [
    "# Get dative phrase"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "f89ab9f1",
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_dative_phrase(doc):\n",
    "    for token in doc:\n",
    "        if (\"dative\" in token.dep_):\n",
    "            subtree = list(token.subtree)\n",
    "            start = subtree[0].i\n",
    "            end = subtree[-1].i + 1\n",
    "            return doc[start:end]"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "16019f63",
   "metadata": {},
   "source": [
    "# Combined function"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "0fd408de",
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_phrase(doc, phrase):\n",
    "    # phrase is one of \"subj\", \"obj\", \"dative\"\n",
    "    for token in doc:\n",
    "        if (phrase in token.dep_):\n",
    "            subtree = list(token.subtree)\n",
    "            start = subtree[0].i\n",
    "            end = subtree[-1].i + 1\n",
    "            return doc[start:end]"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "4d7699a6",
   "metadata": {},
   "source": [
    "# Get prepositional phrases"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "9af10562",
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_prepositional_phrase_objs(doc):\n",
    "    prep_spans = []\n",
    "    for token in doc:\n",
    "        if (\"pobj\" in token.dep_):\n",
    "            subtree = list(token.subtree)\n",
    "            start = subtree[0].i\n",
    "            end = subtree[-1].i + 1\n",
    "            prep_spans.append(doc[start:end])\n",
    "    return prep_spans"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "3738acc6",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Laura gave Sam a very interesting book.\n",
      "\tSubject: Laura\n",
      "\tDirect object: a very interesting book\n",
      "\tDative object: Sam\n"
     ]
    }
   ],
   "source": [
    "sentence = \"Laura gave Sam a very interesting book.\"\n",
    "doc = small_model(sentence)\n",
    "subject_phrase = get_phrase(doc, \"subj\")\n",
    "object_phrase = get_phrase(doc, \"obj\")\n",
    "dative_phrase = get_phrase(doc, \"dative\")\n",
    "print(sentence)\n",
    "print(\"\\tSubject:\", subject_phrase)\n",
    "print(\"\\tDirect object:\", object_phrase)\n",
    "print(\"\\tDative object:\", dative_phrase)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "87f90e2e",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "The big black cat stared at the small dog.\n",
      "\tSubject: The big black cat\n",
      "\tDirect object: the small dog\n",
      "\tPrepositional phrases: [the small dog]\n",
      "Jane watched her brother in the evenings.\n",
      "\tSubject: Jane\n",
      "\tDirect object: her brother\n",
      "\tPrepositional phrases: [the evenings]\n"
     ]
    }
   ],
   "source": [
    "sentences = [\n",
    "    \"The big black cat stared at the small dog.\", \n",
    "    \"Jane watched her brother in the evenings.\"\n",
    "]\n",
    "for sentence in sentences:\n",
    "    doc = small_model(sentence)\n",
    "    subject_phrase = get_phrase(doc, \"subj\")\n",
    "    object_phrase = get_phrase(doc, \"obj\")\n",
    "    dative_phrase = get_phrase(doc, \"dative\")\n",
    "    prepositional_phrase_objs = get_prepositional_phrase_objs(doc)\n",
    "    print(sentence)\n",
    "    print(\"\\tSubject:\", subject_phrase)\n",
    "    print(\"\\tDirect object:\", object_phrase)\n",
    "    print(\"\\tPrepositional phrases:\", prepositional_phrase_objs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9cd9c36e",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
